# Heterogeneity analysis ----------------------------------------------------------------------------------------------------------------

load("data/cleaned/transcripts_clean.RData", verbose = TRUE)

translations_filled

morality_analysis_clean <- read_csv("data/morality_analysis/final_analysis.csv") %>%
  rename(morality_rating = mean_morality_rating)

translations_w_morality <- translations_filled %>%
  left_join(morality_analysis_clean %>% select(group_id, pair_id, morality_rating), by = c("group_id", "pair_id"))


# Get the means (in SD units of trans and non-trans)
morality_means_z <- translations_w_morality %>% 
select(group_id, pair_id, morality_rating, r1_photo_1, r1_photo_2) %>% 
filter(!is.na(pair_id)) %>% 
distinct() %>% 
dups_report(group_id, pair_id) %>% 
mutate(pair_includes_trans = str_detect(r1_photo_1, "^T") | str_detect(r1_photo_2, "^T")) %>% 
# Convert morality rating to z-scores, using pair_includes_trans = FALSE for SD and mean
mutate(morality_rating_z = z_calc(morality_rating, mean_na(morality_rating[pair_includes_trans == FALSE]), sd(morality_rating[pair_includes_trans == FALSE], na.rm = TRUE)))

morality_means_z %>% filter(pair_includes_trans)  %>% hist_basic(morality_rating_z)
morality_means_z %>% filter(!pair_includes_trans)  %>% hist_basic(morality_rating_z)

# Get difference in means using an feols_custom model
morality_means_diff <- feols_custom(
  morality_rating_z ~ pair_includes_trans + video_type_placebo + video_type_treatment  + delivery_incentive_exp, 
data = morality_means_z %>% left_join(df %>% select(group_id, stratum_id, video_type_placebo, video_type_treatment, delivery_incentive_exp), by = "group_id") %>% mutate(pair_includes_trans = as.integer(pair_includes_trans)), 
cluster = "group_id", fixef = "stratum_id")

morality_means_diff %>% get_coeff("pair_includes_trans") %>% write_stat("outputs/stats/morality_means_diff.tex")
morality_means_diff %>% get_p_val("pair_includes_trans") %>% write_p_val("outputs/stats/morality_means_diff_p.tex")


morality_means_z %>% filter(pair_includes_trans) %>% 
get_mean("morality_rating") %>% 
write_stat("outputs/stats/morality_mean_trans.tex")

morality_means_z %>% filter(!pair_includes_trans) %>% 
get_mean("morality_rating") %>% 
write_stat("outputs/stats/morality_mean_non_trans.tex")


 morality_analysis_group_level <- translations_w_morality %>%

  # Get whether the trans was included
  tidylog::left_join(
    discuss_obs %>% select(group_id, round, group_obs_trans_included) %>% mutate(round = as.integer(round)),
    by = c("group_id", "pair_id" = "round")
  ) %>%

  # Aggregate to group level
  group_by(group_id) %>%
  summarise(
    morality_rating_trans = mean_na(morality_rating[group_obs_trans_included]),
    morality_rating_non_trans = mean_na(morality_rating[!group_obs_trans_included]),
    morality_rating = mean_na(morality_rating)
  ) %>%
  count_nas()


morality_analysis_group_level %>% hist_basic(morality_rating_trans)
morality_analysis_group_level %>% hist_basic(morality_rating_non_trans)


r2_with_morality <- r2_w_discuss_het %>%
  tidylog::left_join(morality_analysis_group_level, by = "group_id") %>%
  mutate(
    morality_z = z_calc_std(morality_rating)
  ) %>%
  tidylog::left_join(
    transcript_discussion_length, by = "group_id"
  ) %>%
  tidylog::mutate(discussion_length_words_pro_anti_trans = z_calc_std(discussion_length_words_pro_anti_trans)) %>%
  tidylog::mutate(
    across(matches("discussion_length"), z_calc_std)
  ) %>%
  count_prop(discussion_length_words_pro_anti_trans)

r2_with_morality %>% select(matches("lead")) %>% glimpse

r2_with_morality %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(r2_choose_trans)) %>%
  mutate(
    across(
      c(spoke_pro_trans_group_excl, morality_rating, morality_z, morality_rating_trans, morality_rating_non_trans),
      ~ ifelse(discuss_type == "control" & is.na(.x), runif(nrow(r2_with_morality), 0, 1), .x)
    )
  ) %>%
  ggplot(aes(x = morality_rating_trans, y = r2_choose_trans, colour = discuss_type)) +
  geom_smooth(method = "lm")



lasso_options_morality <- list(
  potential_controls = control_vars,
  t = "discussion_full",
  interact = NULL,
  group_control = TRUE
)

r2_with_morality_0_control <- r2_with_morality %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  filter(!is.na(r2_choose_trans)) %>%
  mutate(across(c(spoke_pro_trans_group_excl, p_neg_mentions, morality_rating, morality_rating_trans, morality_rating_non_trans), ~ ifelse(discuss_type == "control" & is.na(.x), 0, .x))) %>%
  mutate(
    across(matches("discussion_length"), ~ ifelse(discuss_type == "control" & is.na(.x), 0, .x))
  ) %>%
  count_prop(discuss_type, discussion_length_words_pro_anti_trans)

fixef_morality <- c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase")

# Remember ~16% of transcripts are missing due to refusal, also no transcripts for listeners

list(
  "Discussion participants" =  feols_custom(
    r2_choose_trans ~ discussion_full * (spoke_pro_trans_group_excl + morality_rating)  + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control,
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  ),
  "Discussion participants" =  feols_custom(
    r2_choose_trans ~ discussion_full * (spoke_pro_trans_group_excl * morality_rating)  + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control,
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  ),
  "Discussion participants" =  feols_custom(
    r2_choose_trans ~ discussion_full * (spoke_pro_trans_group_excl + morality_rating_trans + morality_rating_non_trans)  + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control,
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  ),
  "Discussion participants" =  feols_custom(
    r2_choose_trans ~ discussion_full + spoke_pro_trans_group_excl * (morality_rating_trans + morality_rating_non_trans)  + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control,
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  ),
  "Discussion participants" =  feols_custom(
    r2_choose_trans ~ discussion_full + morality_rating_trans + morality_rating_non_trans + n_r1_choose_trans  + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control,
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  ),

  "Discussion participants" =  feols_custom(
    r2_choose_trans ~ discussion_full * (morality_rating + n_r1_choose_trans)  + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control,
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  )

) %>%
  tex_export(
    coef_omit = vars_to_regex(c("stratum_id", "Intercept", "video", "group_control", "pair_includes_trans_alt$", "phase", control_vars, "delivery")),
  )

# Export a table ---------------------------------------------------------------------------------------------------------------

models_morality <- list(
  feols_custom(
    r2_choose_trans ~ discussion_full + morality_rating_trans + morality_rating_non_trans + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control,
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  ),
  feols_custom(
    r2_choose_trans ~ discussion_full + spoke_pro_trans_group_excl + morality_rating_trans + morality_rating_non_trans + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control,
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  ),
  feols_custom(
    r2_choose_trans ~ discussion_full + n_r1_choose_trans + morality_rating_trans + morality_rating_non_trans + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control,
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  )
)

models_morality %>%
  set_names(rep("Dep var: Chose trans in outcome round (=1)", length(models_morality))) %>%
 tex_export(
  file = "outputs/tables/models_morality.tex",
  coef_rename = coef_label,
  gof_map = fe_label_no_fe,
  coef_reorder = c("discussion_full", "morality_rating_trans", "morality_rating_non_trans", "spoke_pro_trans_group_excl", "n_r1_choose_trans"),
  coef_omit = vars_to_regex(c("stratum_id", "Intercept", "video", "group_control", "pair_includes_trans_alt$", "phase", "r2_reliability|item", "delivery")),
  dep_means = list(
    "Mean: No discussion (private)" = "discuss_type == 'control'"
  )
)


models_morality[[1]] %>% get_coeff("morality_rating_trans") %>% times_100 %>% write_stat("outputs/stats/morality_rating_trans.tex", digits = 0)


# Heterogeneity by discussion length ----------------------------------------------------------------------------------------------------------------

list(
"Chose trans in outcome round (=1)" = feols_custom(
    r2_choose_trans ~ discussion_full + discussion_length_words_lead_trans + discussion_length_words_lead_non_trans + discussion_length_words_trans + discussion_length_words_non_trans  + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control %>% filter(discuss_type == "discussion_full"),
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  ),
"Chose trans in outcome round (=1)" = feols_custom(
    r2_choose_trans ~ discussion_full + discussion_ratio_lead_trans + discussion_ratio_lead_non_trans + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_morality_0_control %>% filter(discuss_type == "discussion_full"),
    fixef = fixef_morality, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  )
) %>%
    tex_export(
      file = "outputs/tables/enumerator_influence.tex",
    coef_rename = coef_label,
    gof_map = fe_label_no_fe,
    dep_means = list(
      "Mean: No discussion (private)" = "discuss_type == 'control'"
    ),
    coef_omit = vars_to_regex(c("stratum_id", "Intercept", "video", "group_control", "pair_includes_trans_alt$", "phase", control_vars, "delivery")),
  )


# Descriptive statistics on each one
transcript_discussion_length %>% get_mean(discussion_length_words_lead_trans) %>% write_stat("outputs/stats/discussion_length_words_lead_trans.tex", digits = 0)
transcript_discussion_length %>% get_mean(discussion_length_words_lead_non_trans) %>% write_stat("outputs/stats/discussion_length_words_lead_non_trans.tex", digits = 0)
transcript_discussion_length %>% get_mean(discussion_length_words_trans) %>% write_stat("outputs/stats/discussion_length_words_trans.tex", digits = 0)
transcript_discussion_length %>% get_mean(discussion_length_words_non_trans) %>% write_stat("outputs/stats/discussion_length_words_non_trans.tex", digits = 0)

transcript_discussion_length %>% get_mean(discussion_ratio_lead_trans) %>% write_percentage("outputs/stats/discussion_ratio_lead_trans.tex", digits = 0)
transcript_discussion_length %>% get_mean(discussion_ratio_lead_non_trans) %>% write_percentage("outputs/stats/discussion_ratio_lead_non_trans.tex", digits = 0)


r2_with_morality_0_control %>% 
filter(discuss_type == "discussion_full") %>%
select(matches("discussion_length|discussion_ratio")) %>% 
count_nas()



  r2_with_morality_0_control %>% count_prop(discussion_length_words_lead_non_trans)

r2_with_morality_0_control %>%
  filter(discuss_type == "discussion_full") %>%
  mutate(n_r1_choose_trans = factor(n_r1_choose_trans)) %>%
  ggplot(aes(x = discussion_length_words_pro_anti_trans, y = r2_choose_trans)) +
  geom_smooth(method = "lm")


# Is it correlated with "amount discussed"?
r2_with_morality %>%
  select(group_id, matches("amount_discussed"), matches("discussion_length")) %>%
  distinct() %>%
  select(-group_id) %>%
  corr_plot()


cor(x = r2_with_morality$amount_discussed_trans, y = r2_with_morality$discussion_length_words_pro_anti_trans, use = "pairwise.complete.obs")
# Only 0.1 correlation... not much


# TRY A GRAPH
r2_with_morality %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  mutate(
    high_morality = morality_rating > median_na(morality_rating)
  ) %>%
  filter(!(is.na(high_morality) & discuss_type == "discussion_full")) %>%
  mutate(
    spoke_pro_trans_group_excl = ifelse(is.na(spoke_pro_trans_group_excl) & discuss_type == "control", runif(nrow(.), 0, 1), spoke_pro_trans_group_excl)
  ) %>%
  ggplot(aes(x = spoke_pro_trans_group_excl, y = r2_choose_trans, colour = fct_cross(factor(discuss_type), fct_explicit_na(factor(high_morality))))) +
  geom_smooth(method = "lm")


# Morality effect on SOBs ----------------------------------------------------------------------------------------------------------------

group_predic %>% ungroup %>% dups_report(ind_id)


group_predic_w_morality <- group_predic %>%
  tidylog::left_join(gen_att_in_discussion, by = "ind_id", suffix = c("", "_extra")) %>%
  tidylog::left_join(spoke_in_favour_discuss, by = "ind_id", suffix = c("", "_extra")) %>%
  tidylog::left_join(discuss_obs_per_group %>% select(group_id, p_pos_mentions, p_neg_mentions), by = c("group_id")) %>%
  tidylog::left_join(r1_n_choose_trans, by = "ind_id", suffix = c("", "_extra")) %>%
  mutate(
    favourable_discussion_z = ((spoke_pro_trans_z + gobs9_z) / 2),
    favourable_discussion_z_group_excl = ((spoke_pro_trans_z_group_excl + gobs9_z_group_excl) / 2)
  ) %>%
  tidylog::left_join(morality_analysis_group_level, by = "group_id") %>%
  mutate(
    morality_z = z_calc_std(morality_rating)
  )


group_predic_w_morality_0_control <- group_predic_w_morality %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  filter(!is.na(group_predic_choose_trans)) %>%
  mutate(across(c(spoke_pro_trans_group_excl, p_neg_mentions, morality_rating, morality_rating_trans, morality_rating_non_trans), ~ ifelse(discuss_type == "control" & is.na(.x), 0, .x)))

fixef_morality_group_predic <- c("stratum_id", "video_type", "delivery_incentive_exp", "phase")

morality_models_sob <- list(
  "Discussion participants" =  feols_custom(
    group_predic_choose_trans ~ discussion_full * (morality_rating_trans + morality_rating_non_trans + n_r1_choose_trans)  + video_type_placebo + video_type_treatment  + delivery_incentive_exp + item_diff + reliability_diff + reliability_shown ,
    data = group_predic_w_morality_0_control,
    fixef = fixef_morality_group_predic, cluster = "group_id", stratum = "stratum_id", lasso = TRUE, lasso_options = lasso_options_morality
  )
) 


morality_models_sob %>%
  tex_export(
    coef_omit = vars_to_regex(c("stratum_id", "Intercept", "reliability", "video", "group_control", "pair_includes_trans_alt$", "phase", control_vars, "delivery")),
    dep_means = list("Mean: No discussion (private)" = "discuss_type == 'control'")
  )


morality_models_sob[[1]] %>% get_coeff("morality_rating_trans") %>% times_100 %>%  write_stat("outputs/stats/morality_rating_trans_sob.tex", digits = 0)

# Get p-value
morality_models_sob[[1]] %>% get_p_val("morality_rating_trans") %>% write_p_val("outputs/stats/morality_rating_trans_sob_p.tex")